package com.creator.jobmatcherresume.tools;


import java.util.Arrays;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * @Author: Hank.GUO
 * @Date: 2025/3/25 22:07
 * @Email: guohaihang0512@163.com
 * @Description
 */
public class TextPreprocessor {

    private Set<String> stopWords = loadStopWords();

    public String cleanText(String text) {
        // 去除特殊字符、转小写
        return text.replaceAll("[^a-zA-Z0-9]", " ").toLowerCase();
    }

    public Set<String> extractKeywords(String text) {
        String cleaned = cleanText(text);
        return Arrays.stream(cleaned.split("\\s+"))
                .filter(word -> !stopWords.contains(word))
                .collect(Collectors.toSet());
    }

    private Set<String> loadStopWords() {
        // 加载停用词表（示例）
        return Set.of("the", "and", "with", "for");
    }
}
